import xlrd
import csv

house_standing_committees = ['HSJU', 'HSWM', 'HSVR', 'HSBU', 'HSIF', 'HSBA', 'HSFA', 'HSGO', 'HSPW', 'HSAG', 'HSAS', 'HSSY', 'HSED', 'HSII', 'HSMM', 'HSAP', 'HSPO', 'HSRU', 'HSSO', 'HSSM']

senate_standing_committees = ['SSSB', 'SSFR', 'SSBU', 'SSGA', 'SSCM', 'SSHR', 'SSAS', 'SSFI', 'SSAF', 'SSAP', 'SSEG', 'SSBK', 'SSEV', 'SSVA', 'SSJU']

standing_committees = {'house' : house_standing_committees, 'senate' : senate_standing_committees}

committee_map = {}
with open("./Dropbox/Coalitions/committee_map.csv", 'r') as f:
    responsereader = csv.reader(f, delimiter = ',', quotechar = '"')
    for row in responsereader:
        committee_map[int(row[0])] = row[1]
        
# 107th Senate is tricky.  I called it for the Democrats, but it might be worth giving it a special dummy.
control_map = {'house' : {80 : 'Republican', 81: 'Democrat', 82 : 'Democrat', 83 : 'Republican', 84 : 'Democrat', 85 : 'Democrat', 86 : 'Democrat', 87 : 'Democrat', 88 : 'Democrat', 89 : 'Democrat', 90 : 'Democrat', 91 : 'Democrat', 92 : 'Democrat', 93 : 'Democrat', 94 : 'Democrat', 95 : 'Democrat', 96 : 'Democrat', 97 : 'Democrat', 98 : 'Democrat', 99 : 'Democrat', 100 : 'Democrat', 101 : 'Democrat', 102 : 'Democrat', 103 : 'Democrat', 104 : 'Republican', 105 : 'Republican', 106 : 'Republican', 107 : 'Republican', 108 : 'Republican', 109 : 'Republican', 110 : 'Democrat', 111 : 'Democrat', 112 : 'Republican', 113 : 'Republican', 114 : 'Republican'}, 
'senate' : {80 : 'Republican', 81 : 'Democrat', 82: 'Democrat', 83 : 'Republican', 84: 'Democrat', 85 : 'Democrat', 86 : 'Democrat', 87 : 'Democrat', 88 : 'Democrat', 89 : 'Democrat', 90 : 'Democrat', 91 : 'Democrat', 92 : 'Democrat', 93 : 'Democrat', 94 : 'Democrat', 95 : 'Democrat', 96 : 'Democrat', 97 : 'Republican', 98 : 'Republican', 99 : 'Republican', 100 : 'Democrat', 101 : 'Democrat', 102 : 'Democrat', 103 : 'Democrat', 104 : 'Republican', 105 : 'Republican', 106 : 'Republican', 107 : 'Democrats', 108 : 'Republican', 109 : 'Republican', 110 : 'Democrat', 111 : 'Democrat', 112 : 'Democrat', 113 : 'Democrat', 114 : 'Republican'}}
        

legislator_map = {}
party = {}
bioguide_to_lis_map = {}
bioguide_to_thomas_map = {}
thomas_to_bioguide_map = {}
lis_to_bioguide_map = {}

with open('./Dropbox/Coalitions/congress-legislators/legislators-current-edited.csv', 'r') as f:
    responsereader = csv.reader(f, delimiter = ',', quotechar = '"')
    for row in responsereader:
        try:
            if row[4] == 'sen':
                bioguide_to_lis_map[row[18]] = row[21]
                lis_to_bioguide_map[row[21]] = row[18]
                
            legislator_map[int(row[27])] = row[18]
            bioguide_to_thomas_map[row[18]] = row[19].zfill(5)
            thomas_to_bioguide_map[row[19].zfill(5)] = row[18]
            party[row[18]] = row[7]
            if row[7] == 'Democrat-Liberal':
                party[row[18]] = 'Democrat'
            if row[7] == 'Republican-Conservative':
                party[row[18]] = 'Republican'
        except:
            #print(row[1] + ' ' + row[0] + ' missing')
            pass
        
with open('./Dropbox/Coalitions/congress-legislators/legislators-historic-edited.csv', 'r') as f:
    responsereader = csv.reader(f, delimiter = ',', quotechar = '"')
    for row in responsereader:
        try:
            if row[4] == 'sen':
                bioguide_to_lis_map[row[18]] = row[21]
                lis_to_bioguide_map[row[21]] = row[18]

            legislator_map[int(row[27])] = row[18]
            bioguide_to_thomas_map[row[18]] = row[19].zfill(5)
            thomas_to_bioguide_map[row[19].zfill(5)] = row[18]
            party[row[18]] = row[7]
            if row[7] == 'Democrat-Liberal':
                party[row[18]] = 'Democrat'
            if row[7] == 'Republican-Conservative':
                party[row[18]] = 'Republican'
        except:
            #print(row[1] + ' ' + row[0] + ' missing')
            pass

all_legislators = {'house' : {}, 'senate' : {}}
committees = {}
missing_legislators = 0
sample = []
exit_sample = []

sheet = xlrd.open_workbook("./Dropbox/Coalitions/committees/house_assignments_103-114-edited.xls").sheet_by_index(0)

for i in xrange(2, sheet.nrows):
    try:
        row = sheet.row_values(i)
        reason = int(row[4])
        committee_id = committee_map[int(row[1])]
        congress = int(row[0])
        leg_id = legislator_map[int(row[2])]
        
        if congress not in all_legislators['house']:
            all_legislators['house'][congress] = set()
        all_legislators['house'][congress].add(leg_id)
                    
        if committee_id not in house_standing_committees:
            continue
        
        if committee_id not in committees:
            committees[committee_id] = {}
        if congress not in committees[committee_id]:
            committees[committee_id][congress] = set()
        committees[committee_id][congress].add(leg_id)
        
        if reason in [4,5,6,7]:
            sample.append([leg_id, 'house', committee_id, congress, reason])
        
        if row[12] == '':
            continue
        exit_reason = int(row[12])
        if exit_reason in [2,3]:
            exit_sample.append([leg_id, 'house', committee_id, congress, exit_reason])
            
    except:
        missing_legislators += 1
        #print('missing ' + str(i) + ' ' + str(row[2]))
        pass
        
sheet = xlrd.open_workbook("./Dropbox/Coalitions/committees/house_assignments_96-102-edited.xls").sheet_by_index(0)

for i in xrange(2, sheet.nrows):
    try:
        row = sheet.row_values(i)
        reason = int(row[4])
        committee_id = committee_map[int(row[1])]
        congress = int(row[0])
        leg_id = legislator_map[int(row[2])]
        
        if congress not in all_legislators['house']:
            all_legislators['house'][congress] = set()
        all_legislators['house'][congress].add(leg_id)
        
        if committee_id not in house_standing_committees:
            continue
        
        if committee_id not in committees:
            committees[committee_id] = {}
        if congress not in committees[committee_id]:
            committees[committee_id][congress] = set()
        committees[committee_id][congress].add(leg_id)
        
        if reason in [4,5,6,7]:
            sample.append([leg_id, 'house', committee_id, congress, reason])
            
        exit_reason = int(row[12])
        if exit_reason in [2,3]:
            exit_sample.append([leg_id, 'house', committee_id, congress, exit_reason])
    except:
        missing_legislators += 1
        #print('missing ' + str(i) + ' ' + str(row[2]))
        pass
        
with open("./Dropbox/Coalitions/committees/hrc8097.mit") as f:
    l = -1
    for line in f.readlines():
        try:
            l += 1
            congress = int(line[48:50].strip())
            
            if congress <= 93:
                continue
            
            committee_id = committee_map[int(line[50:53].strip())]
            leg_id = legislator_map[int(line[14:19].strip())]
            reason = int(line[53])
            
            if congress not in all_legislators['house']:
                all_legislators['house'][congress] = set()
            all_legislators['house'][congress].add(leg_id)

            if committee_id not in house_standing_committees:
                continue
            
            if committee_id not in committees:
                committees[committee_id] = {}
            if congress not in committees[committee_id]:
                committees[committee_id][congress] = set()
            committees[committee_id][congress].add(leg_id)
            
            if reason in [4,5,6,7]:
                sample.append([leg_id, 'house', committee_id, congress, reason])
            exit_reason = int(line[84])
            if exit_reason in [2,3]:
                exit_sample.append([leg_id, 'house', committee_id, congress, exit_reason])
                
        except:
            missing_legislators += 1
            #print(str(l) + ' ' + line[14:19])
            pass

print(str(missing_legislators) + ' missing from the House')

sheet = xlrd.open_workbook("./Dropbox/Coalitions/committees/senate_assignments_103-114-edited.xls").sheet_by_index(0)

for i in xrange(2, sheet.nrows):
    try:
        row = sheet.row_values(i)
        reason = int(row[4])
        committee_id = committee_map[int(row[1])]
        congress = int(row[0])
        leg_id = legislator_map[int(row[2])]
        
        if congress not in all_legislators['senate']:
            all_legislators['senate'][congress] = set()
        all_legislators['senate'][congress].add(leg_id)
        
        if committee_id not in senate_standing_committees:
            continue
        
        if committee_id not in committees:
            committees[committee_id] = {}
        if congress not in committees[committee_id]:
            committees[committee_id][congress] = set()
        committees[committee_id][congress].add(leg_id)
        
        if reason in [4,5,6,7]:
            sample.append([leg_id, 'senate', committee_id, congress, reason])
            
        if row[12] == '':
            continue
        exit_reason = int(row[12])
        if exit_reason in [2,3]:
            exit_sample.append([leg_id, 'senate', committee_id, congress, exit_reason])
    except:
        missing_legislators += 1
        print('missing ' + str(i) + ' ' + str(row[2]))
        pass
        
with open("./Dropbox/Coalitions/committees/snc80102.mit") as f:
    l = -1
    for line in f.readlines():
        try:
            l += 1
            congress = int(line[47:50].strip())
            
            if congress <= 93:
                continue
            
            reason = int(line[53])
            
            if int(line[50:53].strip()) not in committee_map:
                continue
                
            committee_id = committee_map[int(line[50:53].strip())]
            leg_id = legislator_map[int(line[14:19].strip())]
                        
            if congress not in all_legislators['senate']:
                all_legislators['senate'][congress] = set()
            all_legislators['senate'][congress].add(leg_id)
                        
            if committee_id not in senate_standing_committees:
                continue
            
            if committee_id not in committees:
                committees[committee_id] = {}
            if congress not in committees[committee_id]:
                committees[committee_id][congress] = set()
            committees[committee_id][congress].add(leg_id)
            
            if reason in [4,5,6,7]:
                sample.append([leg_id, 'senate', committee_id, congress, reason])
            exit_reason = int(line[84])
            if exit_reason in [2,3]:
                exit_sample.append([leg_id, 'senate', committee_id, congress, exit_reason])
        except:
            missing_legislators += 1
            print(str(l) + ' ' + line[14:19])
            pass

print(str(missing_legislators) + ' missing from the total')
